# Required Packages
import pandas as pd
import numpy as np
# preprocessing
from sklearn.impute import SimpleImputer
# Visualisation libraries
## Text
from colorama import Fore, Back, Style
from IPython.display import Image, display, Markdown, Latex
## progress bar
import progressbar
## seaborn
import seaborn as sns
sns.set_context('paper', rc={'font.size':12,'axes.titlesize':14,'axes.labelsize':12})
sns.set_style('white')
## matplotlib
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse, Polygon
import matplotlib.gridspec as gridspec
import matplotlib.colors
from pylab import rcParams
plt.style.use('seaborn-whitegrid')
plt.rcParams['figure.figsize'] = 14, 8
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
plt.rcParams['text.color'] = 'k'
%matplotlib inline
## plotly
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
import plotly.offline as py
from plotly.subplots import make_subplots
import plotly.express as px
%config InlineBackend.figure_format = 'retina'
import warnings
warnings.filterwarnings("ignore")
![]()
In this article, we investigate a set simulated dataset that mimics customer behavior on the Starbucks rewards mobile app. Starbucks tends to send out offers to users of the mobile app once every few days. These offers are exclusive, that is not all users receive the same offer. An offer can contain a discount for their products or sometimes BOGO (buy one get one free). These offers have a validity period before the offer expires. The article here is inspired by a towardsdatascience.com article.
def Line(N): return N*'='
def Header(Inp, Length = 120):
print(Back.BLACK + Fore.CYAN + Style.NORMAL + '%s' % Inp + Style.RESET_ALL
+ Fore.BLUE + Style.NORMAL + ' %s' % Line(Length- len(Inp) - 1) + Style.RESET_ALL)
def Bottom(Length = 120):
print(Fore.BLUE + Style.NORMAL + '%s' % Line(Length) + Style.RESET_ALL)
# Portfolio Dataset
Header('Portfolio Dataset:')
Portfolio = pd.read_csv('StarBucks/Portfolio_Clean.csv')
display(Portfolio.head().style.hide_index())
# Profile Dataset
Header('Profile Dataset:')
Profile = pd.read_csv('StarBucks/Profile_Clean.csv')
display(Profile.head().style.hide_index())
# Transcript Dataset
Header('Transcript Dataset:')
Transcript = pd.read_csv('StarBucks/Transcript_Clean.csv')
display(Transcript.head().style.hide_index())
Bottom()
User_Data = pd.read_csv('StarBucks/User_Data.csv')
Data = pd.read_csv('StarBucks/Data.csv')
| Feature | Description |
|---|---|
| Reward (int) | Given reward for completing an offer |
| Channels (list of strings) | Email, mobile app, social media, etc |
| Difficulty (int) | Minimum spending requirement for completing an offer |
| Duration (int) | Time that an offer is valid |
| Offer_Type (string) | Type of offer |
| ID (string) | Offer ID |
| Feature | Description |
|---|---|
| Gender (str) | Customers gender |
| Age (int) | Customers age |
| ID (str) | Customers ID |
| Became_Member_On (int) | Date of membership |
| Income (float) | Customer's income |
| Feature | Description |
|---|---|
| Person (str) | Customer ID |
| Event (str) | Record description |
| time (int) | Time in hours (since the beginning of the study) |
| Value - (dict of strings) | Offer ID or transaction amount |
Offer_Type_List = ['Email','Mobile','Social','Web']
Group = Portfolio.groupby('Offer_Type')[Offer_Type_List].sum()
display(Group)
Group = Group.reset_index().melt(id_vars= ['Offer_Type'], value_vars= Group.columns, var_name='Channels', value_name='Count')
Colors = ['LightBlue', 'DeepSkyBlue', 'CornFlowerBlue', 'RoyalBlue', 'MediumBlue', 'Navy']
fig = px.bar(Group, x='Offer_Type', y='Count', orientation='v',
color = 'Channels', barmode='group', color_discrete_sequence= Colors, height= 400)
fig.update_traces(marker_line_color= Colors[-1], marker_line_width=0.8, opacity=1)
fig.update_layout(plot_bgcolor= 'white', width = 700)
fig['layout']['yaxis'].update(range=[0, 5])
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_layout(title={'text': 'Channel Distribution for Various Offers',
'x':0.5, 'y':0.91,
'xanchor': 'center', 'yanchor': 'top'},
xaxis_title='Offer Type')
fig.show()
It seems that social media is the least efficient channel among all.
fig = px.histogram(Profile, x = 'Age', nbins=10, marginal= 'box', color_discrete_sequence= ['LimeGreen'],
hover_data=['Age'])
fig.update_layout(plot_bgcolor= 'white', width = 800)
fig.update_layout(title={'text': 'Age Distribution',
'x':0.5, 'y':0.92,
'xanchor': 'center', 'yanchor': 'top'},
yaxis_title='Frequency')
fig.update_traces(marker_line_color= 'DarkGreen', marker_line_width=1, opacity=1)
fig.add_trace(go.Scatter(x= Profile['Age'].median()* np.ones(6000), y= np.arange(0,6000),
name="Median",
line=dict(color='RoyalBlue', width=2, dash='dot')))
fig.add_trace(go.Scatter(x= Profile['Age'].mean()* np.ones(6000), y= np.arange(0,6000),
name="Mean",
line=dict(color='Red', width=2, dash='dot')))
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig['layout']['yaxis'].update(range=[0, 6e3])
fig.show()
Costumers are mainly from the age group 50-60.
fig = px.histogram(Profile, x = 'Income', nbins=10, marginal= 'box', color_discrete_sequence= ['LightSkyblue'],
hover_data=['Income'])
fig.update_layout(plot_bgcolor= 'white', width = 800)
fig.update_layout(title={'text': 'Income Distribution',
'x':0.5, 'y':0.92,
'xanchor': 'center', 'yanchor': 'top'},
yaxis_title='Frequency')
fig.update_traces(marker_line_color= 'Navy', marker_line_width=1, opacity=1)
fig.add_trace(go.Scatter(x= Profile['Income'].median()* np.ones(6000), y= np.arange(0,6000),
name="Median",
line=dict(color='Black', width=2, dash='dot')))
fig.add_trace(go.Scatter(x= Profile['Income'].mean()* np.ones(6000), y= np.arange(0,6000),
name="Mean",
line=dict(color='Red', width=2, dash='dot')))
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig['layout']['yaxis'].update(range=[0, 5e3])
fig.show()
Most of the customers' income is around 60K.
Temp = Profile.groupby(['Gender'])['Gender'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
display(Temp)
Temp.reset_index(drop = False, inplace = True)
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "xy"}, {'type':'domain'}]])
Colors = ['LightPink','CornFlowerBlue','LightGreen']
# Left
fig.add_trace(go.Bar(x= Temp['Gender'].values, y= Temp['Count'].values, marker_color= Colors,
textposition='inside', showlegend = False), 1,1)
# Right
fig.add_trace(go.Pie(labels= Temp['Gender'].values,
values= Temp['Count'].values, textfont=dict(size=16),
marker=dict(colors = Colors, line=dict(color='black', width=1))), 1, 2)
# Updates
fig.update_layout(plot_bgcolor= 'white')
fig.update_yaxes(title_text='Average', range=[0, 9e3], row=1, col=1)
fig.update_traces(marker_line_color='black', marker_line_width=1, opacity=1)
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.show()
The number of male customers is larger than the number of female customers.
Temp = Profile.groupby(['Member_Since_Year'])['Member_Since_Year'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
display(Temp)
Temp.reset_index(drop = False, inplace = True)
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "xy"}, {'type':'domain'}]])
Colors = ['HoneyDew','GreenYellow', 'Bisque','LightSalmon','Plum', 'LightSkyBlue']
# Left
fig.add_trace(go.Bar(x= Temp['Member_Since_Year'].values, y= Temp['Count'].values, marker_color= Colors,
textposition='inside', showlegend = False), 1,1)
# Right
fig.add_trace(go.Pie(labels= Temp['Member_Since_Year'].values,
values= Temp['Count'].values, textfont=dict(size=16),
marker=dict(colors = Colors, line=dict(color='black', width=1))), 1, 2)
# Updates
fig.update_layout(plot_bgcolor= 'white')
fig.update_layout(xaxis = dict(tickmode = 'array', tickvals = np.arange(2013, 2019)))
fig.update_yaxes(title_text='Average', range=[0, 7e3], row=1, col=1)
fig.update_traces(marker_line_color='black', marker_line_width=1, opacity=1)
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.show()
Temp = Profile.groupby(['Member_Since_Year','Gender'])['Gender'].agg({'count'}).rename(columns ={'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
display(Temp)
Temp.reset_index(drop = False, inplace = True)
Temp['Member_Since_Year'] = Temp['Member_Since_Year'].astype(str)
Temp.columns = [x.replace('_',' ') for x in Temp.columns]
Colors = ['LightPink','CornFlowerBlue','LightGreen']
fig = px.bar(Temp, y= 'Member Since Year', x= 'Percentage', orientation='h',
color = 'Gender', text = 'Percentage', color_discrete_sequence= Colors, hover_data = Temp.columns, height= 450)
fig.update_traces(marker_line_color= 'Navy', marker_line_width=1, opacity=1)
fig.update_traces(texttemplate='%{text:.2}%', textposition='inside')
fig.update_layout(uniformtext_minsize= 8, uniformtext_mode='hide')
fig['layout']['xaxis'].update(range=[0, 40])
fig.update_layout(plot_bgcolor= 'white')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_layout(title={'text': 'Membership Growth Over Year by Gender',
'x':0.46, 'y':0.92,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Assuming the data is a snapshot of the end of 2018. get member tenure by the number of months.
Temp = Profile[['Member_Tenure','Gender']].dropna()
Temp.columns = [x.replace('_',' ') for x in Temp.columns]
Colors = ['LightPink','CornFlowerBlue','LightGreen']
fig = px.histogram(Temp, x = 'Member Tenure', color='Gender', nbins=100, marginal= 'box', color_discrete_sequence= Colors,
hover_data= Temp.columns)
fig.update_layout(plot_bgcolor= 'white')
fig.update_layout(title={'text': 'Member Tenure Distribution by Gender',
'x':0.5, 'y':0.92,
'xanchor': 'center', 'yanchor': 'top'},
yaxis_title='Frequency')
fig.update_traces(marker_line_color= 'Navy', marker_line_width=1, opacity=1)
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig['layout']['yaxis'].update(range=[Temp, 1600])
fig['layout']['xaxis'].update(range=[Temp['Member Tenure'].min()-1, Temp['Member Tenure'].max()+1])
fig.show()
Summary_Transcript = pd.DataFrame(Transcript.groupby(['Event'])['Event'].count())
Summary_Transcript.columns = ['Count']
display(Summary_Transcript.T)
data = {'Completed': [100*Summary_Transcript.loc['Offer Completed','Count']/Summary_Transcript.loc['Offer Viewed','Count']],
'Viewed':[100*Summary_Transcript.loc['Offer Viewed','Count']/Summary_Transcript.loc['Offer Received','Count']]}
Group = pd.DataFrame(data = data).round(2).T
Group.columns = ['Percentage']
display(Group.T)
fig = plt.figure(figsize=(13, 5), constrained_layout=True)
gs = fig.add_gridspec(1, 5)
ax0 = fig.add_subplot(gs[:-1])
ax1 = fig.add_subplot(gs[-1])
_ = sns.barplot(ax = ax0, y="Event", x="Count", palette='Purples',
edgecolor='k', hatch="///", data=Summary_Transcript.reset_index(drop = False))
_ = ax0.set_xlim([0, 14e4])
_ = ax0.set_xlabel('Count', fontsize = 14)
_ = sns.barplot(ax = ax1, x="index", y="Percentage", palette='Blues',
edgecolor='k', hatch="///", data=Group.reset_index(drop = False))
_ = ax1.set_xlabel(None, fontsize = 14)
_ = ax1.set_ylim([0,100])
_ = ax1.set_yticks(np.arange(0, 101, 10))
# plt.subplots_adjust(wspace=-0.2)
Temp = Data.groupby(['Event','Offer_Type'])['Offer_Type'].agg({'count'}).rename(columns ={'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
display(Temp)
Temp.reset_index(drop = False, inplace = True)
Temp.columns = [x.replace('_',' ') for x in Temp.columns]
Colors = ['GreenYellow','LightSalmon','LightSkyBlue']
fig = px.bar(Temp, y= 'Offer Type', x= 'Percentage', orientation='h',
color = 'Event', text = 'Percentage', color_discrete_sequence= Colors, hover_data = Temp.columns, height= 350)
fig.update_traces(marker_line_color= 'Navy', marker_line_width=1, opacity=1)
fig.update_traces(texttemplate='%{text:.2}%', textposition='inside')
fig.update_layout(uniformtext_minsize= 8, uniformtext_mode='hide')
fig['layout']['xaxis'].update(range=[0, 50])
fig.update_layout(plot_bgcolor= 'white')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_layout(title={'text': 'Event by Offer Type',
'x':0.46, 'y':0.92,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
First off,
Data['Event'].unique().tolist()
Now, let's focus on those customers that completed an offer.
# creating a list of these customers
offer_completed_list = Data.loc[Data['Event'] == 'Offer Completed', 'Person'].tolist()
The journey of one of these customers can be analyzed as well. For example, the first customer from the above list
Customer_Journey = Data.loc[Data.Person == offer_completed_list[0],:].sort_index()
Customer_Journey.head(6).style.hide_index()
To have plots for this part. We need to calulate transactions amount without any offers and The number of transaction without any offers.
# Transactions amount without any offers
User_Data['Tran_Amnt_No_Offer'] = User_Data['Tot_Tran_Amnt']- User_Data['Offer_Trans_Amnt']
# The number of transaction without any offers
User_Data['Trans_Cnt_No_Offer'] = User_Data['Tot_Tran_Cnt']- User_Data['Offer Completed']
# Converting to integers
User_Data[['BOGO_offer','Disc_offer','Info_offer']] = User_Data[['BOGO_offer','Disc_offer','Info_offer']].astype(int)
In the following tables and plots, (BOGO Offer, Discount Offer, Informational Offer) shows that what offer has been used. For example, (1,1,0) means people who respond to BOGO Offers and Discount Offers but not Informational Offers.
def Mean_by_Offer_Type (Columns):
j = Columns[0]
Group = User_Data.groupby(['BOGO_offer','Disc_offer','Info_offer'])[j].mean().reset_index()
for j in Columns[1:]:
Temp = User_Data.groupby(['BOGO_offer','Disc_offer','Info_offer'])[j].mean().reset_index()
Group = Group.merge(Temp, on =['BOGO_offer','Disc_offer','Info_offer'])
del Temp
return Group
Column_List = ['Offer_Tran_Cnt_Ratio', 'Offer_Trans_Amnt_Ratio', 'Offer_Comp_View_Ratio', 'Offer_Comp_Rec_Ratio']
Group = Mean_by_Offer_Type (Column_List).set_index(['BOGO_offer','Disc_offer','Info_offer'])
display(Group.round(4))
Colors = ['GreenYellow', 'Bisque','LightSalmon','Plum', 'LightSkyBlue']
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(16, 7), sharex=False)
_ = Group.plot.bar(ax = ax, rot = 0, edgecolor = 'Navy', color = Colors, hatch = '//')
_ = ax.set_title('Metric Average of Offer Type Combinations')
_ = ax.legend(('Offer Transactions Count Ratio', 'Offer Transactions Amount Ratio',
'Offer Completed View Ratio', 'Offer Completed Receive Ratio'), loc='upper left', fontsize = 12)
_ = ax.set_xlabel('(BOGO Offer, Discount Offer, Informational Offer)')
_ = ax.set_ylim([0,1])
Column_List = ['Offer Completed', 'Tot_Tran_Cnt', 'Trans_Cnt_No_Offer', 'Tot_Rewards_Rec', 'Reward_per_Offer','Difficulty_per_Offer']
Group = Mean_by_Offer_Type (Column_List).set_index(['BOGO_offer','Disc_offer','Info_offer'])
display(Group.round(4))
Colors = ['Blue', 'Orange', 'DarkGreen','Purple','LightPink', 'Crimson']
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(16, 7), sharex=False)
_ = Group.plot.bar(ax = ax, rot = 0, edgecolor = 'Navy', color = Colors)
_ = ax.set_title('Metric Average of Offer Type Combinations')
_ = ax.legend(('Offer Completed', 'Total Transaction Count',
'Total Transaction Count (without any offers)', 'Total Rewards Received', 'Reward per Offer',
'Difficulty per Offer'), loc='upper left', fontsize = 12)
_ = ax.set_xlabel('(BOGO Offer, Discount Offer, Informational Offer)')
_ = ax.set_ylim([0,20])
_ = ax.set_yticks(np.arange(0, 21, 2))
Column_List = [ 'Tot_Tran_Amnt', 'Offer_Trans_Amnt', 'Tran_Amnt_per_Offer', 'Ave_Tran_Amnt', 'Tran_Amnt_No_Offer']
Group = Mean_by_Offer_Type (Column_List).set_index(['BOGO_offer','Disc_offer','Info_offer'])
display(Group.round(4))
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(16, 7), sharex=False)
_ = Group.plot.bar(ax = ax, rot = 0, edgecolor = 'k',
color = sns.set_palette(["#9b59b6", "#3498db", "#e74c3c", "#34495e","#2ecc71","#95a5a6"]))
_ = ax.set_title('Metric Average of Offer Type Combinations')
_ = ax.legend(('Total Transaction Amount', 'Transaction Amount (with Offers)',
'Transaction Amount per Offer', 'Average Transaction Amount',
'Transaction Amount (without any Offers)'), loc='upper left', fontsize = 12)
_ = ax.set_xlabel('(BOGO Offer, Discount Offer, Informational Offer)')
_ = ax.set_ylim([0,200])
_ = ax.set_yticks(np.arange(0, 201, 20))
We can see from the above plot that the customers who have taken advantage of all three offer types not only spend the most with offers but also have the highest total transaction amount. In other words, the higher amount of incentivization is, the more is spent.
The BOGO offer is the leading offer in terms of having a higher total transaction amount than discount and informational offers. Besides, customers tend to spend more than half of the total transaction amount without offers while using BOGO and discount offers.
We would like to know how the overall offer completion rate, transaction amount ratio motivated by an offer, and reward per offer affected by the user demographics.
fig, ax = plt.subplots(nrows=3, ncols=3, figsize=(13, 13), sharex=False)
# Average Offer Completion RatioBy Membership Tenure
# [0,0]
female_avg = User_Data[User_Data['Gender_Female']==1].groupby('Member_Tenure').mean()['Offer_Comp_Rec_Ratio']
male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Member_Tenure').mean()['Offer_Comp_Rec_Ratio']
_ = ax[0,0].scatter(female_avg.index, female_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[0,0].scatter(male_avg.index, male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[0,0].set_xlim(0,70)
_ = ax[0,0].set_ylim(0,1)
_ = ax[0,0].set_xlabel('Member Tenure')
_ = ax[0,0].set_title('Average Offer Completion Ratio\nBy Membership Tenure')
_ = ax[0,0].legend(loc='lower right')
# Average Offer Transaction Amount Ratio By Membership Tenure
# [0,1]
female_avg = User_Data[User_Data['Gender_Female']==1].groupby('Member_Tenure')['Offer_Trans_Amnt_Ratio'].mean()
male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Member_Tenure')['Offer_Trans_Amnt_Ratio'].mean()
_ = ax[0,1].scatter(female_avg.index, female_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[0,1].scatter(male_avg.index, male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[0,1].set_xlim(0,70)
_ = ax[0,1].set_ylim(0,1)
_ = ax[0,1].set_xlabel('Member Tenure')
_ = ax[0,1].set_title('Average Offer Transaction Amount Ratio\nBy Membership Tenure')
_ = ax[0,1].legend(loc='lower right')
# Average Reward Per Offer By Membership Tenure
# [0,2]
female_avg = User_Data[User_Data['Gender_Female']==1].groupby('Member_Tenure')['Reward_per_Offer'].mean()
male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Member_Tenure')['Reward_per_Offer'].mean()
_ = ax[0,2].scatter(female_avg.index, female_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[0,2].scatter(male_avg.index, male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[0,2].set_xlim(0,70)
_ = ax[0,2].set_ylim(0,8)
_ = ax[0,2].set_xlabel('Member Tenure')
_ = ax[0,2].set_title('Average Reward Per Offer\nBy Membership Tenure')
_ = ax[0,2].legend(loc='lower left')
# Average Offer Completion Ratio By Income
# [1,0]
female_avg = User_Data[User_Data['Gender_Female']==1].groupby('Income').mean()['Offer_Comp_Rec_Ratio']
male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Income').mean()['Offer_Comp_Rec_Ratio']
_ = ax[1,0].scatter(female_avg.index, female_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[1,0].scatter(male_avg.index, male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[1,0].set_xlim(0,130000)
_ = ax[1,0].set_ylim(0,0.7)
_ = ax[1,0].set_xlabel('Income')
_ = ax[1,0].set_title('Average Offer Completion Ratio\nBy Income')
_ = ax[1,0].legend(loc='lower left')
# Average Offer Transaction Amount Ratio By Income
# [1,1]
female_avg = User_Data[User_Data['Gender_Female']==1].groupby('Income')['Offer_Trans_Amnt_Ratio'].mean()
male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Income')['Offer_Trans_Amnt_Ratio'].mean()
_ = ax[1,1].scatter(female_avg.index, female_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[1,1].scatter(male_avg.index, male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[1,1].set_xlim(0,130000)
_ = ax[1,1].set_ylim(0,0.5)
_ = ax[1,1].set_xlabel('Income')
_ = ax[1,1].set_title('Average Offer Transaction Amount Ratio\nBy Income')
_ = ax[1,1].legend(loc='lower left')
# Average Reward Per Offer By Income
# [1,2]
female_avg = User_Data[User_Data['Gender_Female']==1].groupby('Income')['Reward_per_Offer'].mean()
male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Income')['Reward_per_Offer'].mean()
_ = ax[1,2].scatter(female_avg.index, female_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[1,2].scatter(male_avg.index, male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[1,2].set_xlim(0,130000)
_ = ax[1,2].set_ylim(0,12)
_ = ax[1,2].set_xlabel('Income')
_ = ax[1,2].set_title('Average Reward Per Offer By Income')
_ = ax[1,2].legend(loc='lower left')
# Average Offer Completion Ratio By Age
# [2,0]
female_avg = User_Data[User_Data['Gender_Female']==1].groupby('Age').mean()['Offer_Comp_Rec_Ratio']
male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Age').mean()['Offer_Comp_Rec_Ratio']
_ = ax[2,0].scatter(female_avg.index, female_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[2,0].scatter(male_avg.index, male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[2,0].set_xlim(0,120)
_ = ax[2,0].set_ylim(0,0.8)
_ = ax[2,0].set_xlabel('Income')
_ = ax[2,0].set_title('Average Offer Completion Ratio By Age')
_ = ax[2,0].legend(loc='upper left')
# Average Offer Transaction Amount Ratio By Age
# [2,1]
female_avg = User_Data[User_Data['Gender_Female']==1].groupby('Age')['Offer_Trans_Amnt_Ratio'].mean()
male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Age')['Offer_Trans_Amnt_Ratio'].mean()
_ = ax[2,1].scatter(female_avg.index, female_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[2,1].scatter(male_avg.index, male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[2,1].set_xlim(0,120)
_ = ax[2,1].set_ylim(0,1)
_ = ax[2,1].set_xlabel('Income')
_ = ax[2,1].set_title('Average Offer Transaction Amount Ratio\nBy Age')
_ = ax[2,1].legend(loc='upper left')
# Average Reward Per Offer By Age
# [2,2]
female_avg = User_Data[User_Data['Gender_Female']==1].groupby('Age')['Reward_per_Offer'].mean()
male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Age')['Reward_per_Offer'].mean()
_ = ax[2,2].scatter(female_avg.index, female_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[2,2].scatter(male_avg.index, male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[2,2].set_xlim(0,120)
_ = ax[2,2].set_ylim(0,16)
_ = ax[2,2].set_xlabel('Income')
_ = ax[2,2].set_title('Average Reward Per Offer By Age')
_ = ax[2,2].legend(loc='upper left')
_ = plt.tight_layout()
User_Data['BOGO_Conv_Rate'] = User_Data['BOGO_comp']/User_Data['BOGO_Offer_Rec']
User_Data['Disc_Conv_Rate'] = User_Data['Disc_comp']/User_Data['Disc_Offer_Rec']
User_Data['Info_Conv_Rate'] = User_Data['Info_comp']/User_Data['Info_Offer_Rec']
Temp = User_Data[['BOGO_Conv_Rate', 'Disc_Conv_Rate','Info_Conv_Rate']].agg({'mean'}).T
display(Temp.T)
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(14, 7), sharex=False)
_ = Temp.plot.bar(ax = ax[0], legend = False, color= 'mediumorchid', edgecolor='indigo', hatch = '///', fontsize = 14,rot=0)
_ = ax[0].set_xticks(np.arange(3))
_ = ax[0].set_xticklabels(['BOGO Offer\nConversion Rate', 'Information Offer\nConversion Rate',
'Discount Offer\nConversion Rate'])
_ = ax[0].set_title('Ave Conversion Rate by Offer', fontsize = 14)
_ = ax[0].set_xlabel('Average', fontsize = 14)
_ = ax[0].set_ylim([0, .5])
_ = ax[0].set_yticks(np.arange(0, .55, 0.05))
_ = Temp.plot.pie(ax = ax[1], y= 'mean', startangle=90, label = '', labels = None,
colors = sns.set_palette(['lightskyblue','limegreen','lightsalmon']),
legend=True, autopct='%1.1f%%', fontsize=14,
pctdistance=0.85, explode = (0.05,0.05,0.05))
_ = ax[1].legend(bbox_to_anchor=(.25, 1), labels= ['BOGO Offer Conversion Rate', 'Information Offer Conversion Rate',
'Discount Offer Conversion Rate'], fontsize = 12)
_ = ax[1].add_artist(plt.Circle((0,0),0.70,fc='white'))
fig, ax = plt.subplots(nrows=3, ncols=3, figsize=(13, 13), sharex=False)
# Average BOGO Offer Conversion Rate by Income
# [0,0]
FeMale_avg = User_Data[User_Data['Gender_Female']==1].groupby('Income')['BOGO_Conv_Rate'].mean()
Male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Income')['BOGO_Conv_Rate'].mean()
_ = ax[0,0].scatter(FeMale_avg.index, FeMale_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[0,0].scatter(Male_avg.index, Male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[0,0].set_xlim([0,130000])
_ = ax[0,0].set_ylim([0,1])
_ = ax[0,0].set_xlabel('Income')
_ = ax[0,0].set_title('Average BOGO Offer Conversion Rate\nby Income')
_ = ax[0,0].legend(loc='upper left')
# Average Discount Offer Conversion Rate by Income
# [0,1]
FeMale_avg = User_Data[User_Data['Gender_Female']==1].groupby('Income')['Disc_Conv_Rate'].mean()
Male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Income')['Disc_Conv_Rate'].mean()
_ = ax[0,1].scatter(FeMale_avg.index, FeMale_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[0,1].scatter(Male_avg.index, Male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[0,1].set_xlim([0,130000])
_ = ax[0,1].set_ylim([0,1])
_ = ax[0,1].set_xlabel('Income')
_ = ax[0,1].set_title('Average Discount Offer Conversion Rate\nby Income')
_ = ax[0,1].legend(loc='upper left')
# Average Information Offer Conversion Rate by Income
# [0,2]
FeMale_avg = User_Data[User_Data['Gender_Female']==1].groupby('Income')['Info_Conv_Rate'].mean()
Male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Income')['Info_Conv_Rate'].mean()
_ = ax[0,2].scatter(FeMale_avg.index, FeMale_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[0,2].scatter(Male_avg.index, Male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[0,2].set_xlim([0,130000])
_ = ax[0,2].set_ylim([0,1])
_ = ax[0,2].set_xlabel('Income')
_ = ax[0,2].set_title('Average Information Offer Conversion Rate\nby Income')
_ = ax[0,2].legend(loc='upper left')
# Average BOGO Offer Conversion Rate by Tenure
# [1,0]
FeMale_avg = User_Data[User_Data['Gender_Female']==1].groupby('Member_Tenure')['BOGO_Conv_Rate'].mean()
Male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Member_Tenure')['BOGO_Conv_Rate'].mean()
_ = ax[1,0].scatter(FeMale_avg.index, FeMale_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[1,0].scatter(Male_avg.index, Male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[1,0].set_xlim([0,70])
_ = ax[1,0].set_ylim([0,1])
_ = ax[1,0].set_xlabel('Tenure')
_ = ax[1,0].set_title('Average BOGO Offer Conversion Rate\nby Tenure')
_ = ax[1,0].legend(loc='upper left')
# # Average Discount Offer Conversion Rate by Tenure
# [1,1]
FeMale_avg = User_Data[User_Data['Gender_Female']==1].groupby('Member_Tenure')['Disc_Conv_Rate'].mean()
Male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Member_Tenure')['Disc_Conv_Rate'].mean()
_ = ax[1,1].scatter(FeMale_avg.index, FeMale_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[1,1].scatter(Male_avg.index, Male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[1,1].set_xlim(0,70)
_ = ax[1,1].set_ylim(0,1)
_ = ax[1,1].set_xlabel('Tenure')
_ = ax[1,1].set_title('Average Discount Offer Conversion Rate\nby Tenure')
_ = ax[1,1].legend(loc='upper left')
# Average Information Offer Conversion Rate by Tenure
# [1,2]
FeMale_avg = User_Data[User_Data['Gender_Female']==1].groupby('Member_Tenure')['Info_Conv_Rate'].mean()
Male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Member_Tenure')['Info_Conv_Rate'].mean()
_ = ax[1,2].scatter(FeMale_avg.index, FeMale_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[1,2].scatter(Male_avg.index, Male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[1,2].set_xlim([0,70])
_ = ax[1,2].set_ylim([0,1])
_ = ax[1,2].set_xlabel('Tenure')
_ = ax[1,2].set_title('Average Information Offer Conversion Rate\nby Tenure')
_ = ax[1,2].legend(loc='upper left')
# Average BOGO Offer Conversion Rate by Age
# [2,0]
FeMale_avg = User_Data[User_Data['Gender_Female']==1].groupby('Age')['BOGO_Conv_Rate'].mean()
Male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Age')['BOGO_Conv_Rate'].mean()
_ = ax[2,0].scatter(FeMale_avg.index, FeMale_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[2,0].scatter(Male_avg.index, Male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[2,0].set_xlim([0,100])
_ = ax[2,0].set_ylim([0,1])
_ = ax[2,0].set_xlabel('Age')
_ = ax[2,0].set_title('Average BOGO Offer Conversion Rate\nby Age')
_ = ax[2,0].legend(loc='upper left')
# Average Discount Offer Conversion Rate by Age
# [2,1]
FeMale_avg = User_Data[User_Data['Gender_Female']==1].groupby('Age')['Disc_Conv_Rate'].mean()
Male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Age')['Disc_Conv_Rate'].mean()
_ = ax[2,1].scatter(FeMale_avg.index, FeMale_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[2,1].scatter(Male_avg.index, Male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[2,1].set_xlim([0,100])
_ = ax[2,1].set_ylim([0,1])
_ = ax[2,1].set_xlabel('Age')
_ = ax[2,1].set_title('Average Discount Offer Conversion Rate\nby Age')
_ = ax[2,1].legend(loc='upper left')
# Average Information Offer Conversion Rate by Age
# [2,2]
FeMale_avg = User_Data[User_Data['Gender_Female']==1].groupby('Age')['Info_Conv_Rate'].mean()
Male_avg = User_Data[User_Data['Gender_Male']==1].groupby('Age')['Info_Conv_Rate'].mean()
_ = ax[2,2].scatter(FeMale_avg.index, FeMale_avg, label='Female', color = 'tomato', edgecolor = 'darkred')
_ = ax[2,2].scatter(Male_avg.index, Male_avg, label='Male', color = 'steelblue', edgecolor = 'navy')
_ = ax[2,2].set_xlim([0,100])
_ = ax[2,2].set_ylim([0,1])
_ = ax[2,2].set_xlabel('Age')
_ = ax[2,2].set_title('Average Information Offer Conversion Rate\nby Age')
_ = ax[2,2].legend(loc='upper left')
_ = plt.tight_layout()